# ------------------------------------------------------------------------------#
# Description: Create 
  # Table 8: Nonlinear Models with Control Function 
  # Figure 4: Number of peer adoptions
# Project:     Peer Effects in Voluntary Environmental Policies:
#              An Application to Urban Water Quality
# Author:      Daniel A. Brent | dab320@psu.edu
# Created:     2025-06-11
# ------------------------------------------------------------------------------#

# Clear environment ------------------------------------------------------------
rm(list = ls())
gc()

# Load required packages -------------------------------------------------------
library(pacman)
p_load(data.table, fixest, modelsummary, ggplot2)
options(scipen = 999)

# Load adoption data -----------------------------------------------------------
load("data/adoptions/adoptions.RData")

# Merge peer adoption buckets --------------------------------------------------
load("data/bucket/peer_adopt_buckets_any.RData")
dat <- merge(adopt, peer.adopt.bucket.any, by = c("pin", "year"))
rm(adopt, peer.adopt.bucket.any)

load("data/bucket/peer_adopt_buckets_rg.RData")
dat <- merge(dat, peer.adopt.bucket.rg, by = c("pin", "year"))
rm(peer.adopt.bucket.rg)

load("data/bucket/peer_adopt_buckets_any_rg.RData")
dat <- merge(dat, peer.adopt.bucket.any.rg, by = c("pin", "year"))
rm(peer.adopt.bucket.any.rg)

load("data/bucket/peer_adopt_buckets_cs.RData")
dat <- merge(dat, peer.adopt.bucket.cs, by = c("pin", "year"))
rm(peer.adopt.bucket.cs)

load("data/bucket/peer_adopt_buckets_both.RData")
dat <- merge(dat, peer.adopt.bucket.both, by = c("pin", "year"))
rm(peer.adopt.bucket.both)

# Merge peer eligibility buckets -----------------------------------------------
load("data/bucket/peer_elig_buckets_cs.RData")
dat <- merge(dat, peer.buckets.cs, by = c("pin", "year"))
rm(peer.buckets.cs)

load("data/bucket/peer_elig_buckets_rg.RData")
dat <- merge(dat, peer.buckets.rg, by = c("pin", "year"))
rm(peer.buckets.rg)

# Merge total peers ------------------------------------------------------------
load("data/bucket/total_peers.RData")
dat <- merge(dat, total.peers, by = "pin")
rm(total.peers)

# Merge parcel data ------------------------------------------------------------
load("data/parcel_build/parcel_build.RData")
dat <- merge(dat, parcel.build[, .(pin, zip5, sub.area, sqft, lot, beds, baths,
                                   yearbuilt, val.land, val.improve,
                                   ren.pre90, ren.90.99, ren.00.09, ren.10,
                                   water.prob)], by = "pin")
rm(parcel.build)

# Merge census data ------------------------------------------------------------
load("data/census/all_parcels_census_2010.RData")
parcels.2010[, blk := as.numeric(paste0(block, blkgrp))]
dat <- merge(dat, parcels.2010[, .(pin, tract, blkgrp, block, blk)])
rm(parcels.2010)

# Scale peer adoption variables ------------------------------------------------
dat[, peer.adopt.any.100.scale     := peer.adopt.any.100 / 100]
dat[, peer.adopt.any.rg.100.scale  := peer.adopt.any.rg.100 / 100]
dat[, peer.adopt.rg.100.scale      := peer.adopt.rg.100 / 100]
dat[, peer.adopt.cs.100.scale      := peer.adopt.cs.100 / 100]
dat[, peer.adopt.both.100.scale    := peer.adopt.both.100 / 100]

# Keep post-2010 only 
dat <- dat[year > 2010]

# Create average eligibility variable ------------------------------------------
dat[, elig.peers.avg.100 := elig.peers.cs.100 + elig.peers.rg.100]

# Control Function Residuals ------------------------------------------
boot.samp <- unique(na.omit(dat[elig.cs == 1 & post.rainwise != 1, 
                                .(pin, year, blkgrp, adopt.any, peer.adopt.any.100,
                                  elig.peers.cs.100, elig.peers.avg.100, peers.100)]))

boot.samp[, peer.adopt.any.100.resid := feols(peer.adopt.any.100 ~ peers.100 + elig.peers.avg.100 | blkgrp + year, 
                                              cluster = 'blkgrp', data = boot.samp)$residuals]

# Probit and Probit^2 Models ------------------------------------------
m.probit <- feglm(adopt.any ~ peers.100 + peer.adopt.any.100 + peer.adopt.any.100.resid |
                    blkgrp + year,
                  cluster='blkgrp', lean = F, notes = F, fixef.rm = 'both', glm.iter = 100,
                  data=boot.samp,
                  family = binomial(link = 'probit'))

m.probit.me <- slopes(m.probit, variables = 'peer.adopt.any.100')

m.probit2 <- feglm(adopt.any ~ peers.100 + peer.adopt.any.100 + 
                     peer.adopt.any.100.resid + peer.adopt.any.100.resid^2|
                     blkgrp + year,
                   cluster='blkgrp', lean = F, notes = F, fixef.rm = 'both', glm.iter = 100,
                   data=boot.samp,
                   family = binomial(link = 'probit'))

m.probit2.me <- slopes(m.probit2, variables = 'peer.adopt.any.100')

# Bootstrap SEs for Table 8a --------------------------------------------------------------------------
load("data/bootstrap/bootstrap_nonlinear.RData")
m.probit.boot <- summary(m.probit)
m.probit.boot$coeftable[2, 2] <- sd(boots$probit)

m.probit2.boot <- summary(m.probit2)
m.probit2.boot$coeftable[2, 2] <- sd(boots$probit2)

setFixest_dict(c(peer.adopt.any.100 = "Peer Adoptions",
                 peer.adopt.any.100.resid = "$\\widehat{\\text{Residual}}$",
                 `I(peer.adopt.any.100.resid^2)` = "$\\widehat{\\text{Residual}^2}$"))

style_tex <- style.tex(var.title = "\\midrule", stats.title = "\\midrule")

etable(m.probit.boot, m.probit2.boot,
       drop = c("peers.100"),
       fitstat = ~ n,
       signif.code = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
       style.tex = style_tex)

etable(m.probit.boot, m.probit2.boot,
       drop = c("peers.100"),
       fitstat = ~ n,
       signif.code = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
       style.tex = style_tex,
       file = "output/tables/table_8a.tex", replace = TRUE)

# Average Marginal Effects for Table 8b ------------------------------------------
load('data/bootstrap/bootstrap_nonlinear_ame.RData')

tidy.me <- function(x, ...) {
  ret <- data.frame(
    term      = x$term,
    estimate  = x$estimate,
    std.error  = x$std.error,
    p.value = x$p.value)
  ret
}

glance.me <- function(x, ...) {
  ret <- data.frame(
    N =x$N)
  ret
}
# Probit and Probit^2 Models
m.probit.me.boot <- data.frame(
  'term' = 'peer.adopt.any.100',
  'N' = length(m.probit.me$rowid),
  'estimate' = mean(m.probit.me$estimate),
  'std.error' = sd(boots$probit_c),
  'statistic' = mean(m.probit.me$estimate)/sd(boots$probit_c),
  'p.value' = 2*pnorm(q=mean(m.probit.me$estimate)/sd(boots$probit_c), lower.tail=FALSE)
)

class(m.probit.me.boot) <- 'me'

m.probit2.me.boot <- data.frame(
  'term' = 'peer.adopt.any.100',
  N = length(m.probit2.me$rowid),
  estimate = mean(m.probit2.me$estimate),
  std.error = sd(boots$probit2_c),
  statistic = mean(m.probit2.me$estimate)/sd(boots$probit2_c),
  p.value = 2*pnorm(q=mean(m.probit2.me$estimate)/sd(boots$probit2_c), lower.tail=FALSE)
)
class(m.probit2.me.boot) <- 'me'

cm <- c('peer.adopt.any.100' = 'Peer Adoptions')
models <- list('(1)'= m.probit.me.boot,
               '(2)' = m.probit2.me.boot)

modelsummary(models,
             statistic = c("({std.error})"),
             gof_map = c('N'),
             coef_map = cm,
             fmt = 4,
             stars=c('*'=0.1, '**'=0.05, '***'=0.01))
options("modelsummary_format_numeric_latex" = "plain")

modelsummary(models,
             statistic = c("({std.error})"),
             gof_map = c('N'),
             coef_map = cm,
             fmt = 4,
             stars=c('*'=0.1, '**'=0.05, '***'=0.01),
             output = 'output/tables/table_8b.tex')

# Figure 6: Marginal Effects by # of Peer Adoptions --------------------------------------------------------------------------
load("data/bootstrap/bootstrap_num_adopt.RData")

boot.samp <- dat[elig.cs == 1 & post.rainwise == 0]
boot.samp[, peer.adopt.any.100.resids := feols(peer.adopt.any.100 ~ peers.100 + elig.peers.cs.100 + elig.peers.rg.100 |
                                                 blkgrp + year, data = boot.samp)$residuals]

boot.samp[, paste0("peer.adopt.any.100.", 1:6) := lapply(1:6, function(k) ifelse(peer.adopt.any.100 == k | (k == 6 & peer.adopt.any.100 >= 6), 1, 0))]

m100 <- feols(adopt.any * 100 ~ peer.adopt.any.100.1 + peer.adopt.any.100.2 + peer.adopt.any.100.3 +
                peer.adopt.any.100.4 + peer.adopt.any.100.5 + peer.adopt.any.100.6 +
                peer.adopt.any.100.resids + peers.100 | blkgrp + year,
              cluster = "blkgrp", data = boot.samp)

coef.plot <- data.table(
  num = c("1", "2", "3", "4", "5", "6+"),
  beta = m100$coefficients[1:6],
  se = c(sd(boots$a1), sd(boots$a2), sd(boots$a3), sd(boots$a4), sd(boots$a5), sd(boots$a6))
)

gg <- ggplot(coef.plot, aes(x = num, y = beta)) +
  geom_point(size = 6, color = "dodgerblue") +
  geom_errorbar(aes(ymin = beta - 1.96 * se, ymax = beta + 1.96 * se),
                linewidth = 2, width = 0.2, color = "dodgerblue") +
  xlab("Number of Peer Adoptions") + 
  ylab("Effect on Adoption (percentage points)") +
  theme_minimal(base_size = 30) +
  scale_y_continuous(limits = c(0, 1))

print(gg)

# Save
ggsave("output/figures/figure_5.png", gg, width = 150, height = 150, units = "mm", dpi = 150, scale = 2)
